%matplotlib inline
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.optim as optim
import requests
from torchvision import transforms, models
# get the VGG19
vgg = models.vgg19(pretrained=True)
#change all max-pooling layers to average-pooling layers
for i, layer in enumerate(vgg.features):
if isinstance(layer, torch.nn.MaxPool2d):
vgg.features[i] = torch.nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
vgg = vgg.features
# freeze all parameters
for param in vgg.parameters():
param.requires_grad_(False)
# move the model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg.to(device)
def load_image(img_path, max_size=512, shape=None):
image = Image.open(img_path).convert('RGB')
# set the largest size
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
if shape is not None:
size = shape
in_transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
# discard the transparent, alpha channel and add the batch dimension
image = in_transform(image)[:3,:,:].unsqueeze(0)
return image
# load in content and style image
content = load_image('pp1.jpg').to(device)
# Resize style to match content
style = load_image('mona.jpg', shape=content.shape[-2:]).to(device)
# helper function for un-normalizing an image
# and converting it from a Tensor image to a NumPy image for display
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.numpy().squeeze()
image = image.transpose(1,2,0)
image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
image = image.clip(0, 1)
return image
# display the images for checking
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.imshow(im_convert(content))
ax2.imshow(im_convert(style))
def get_features(image, model, layers=None):
if layers is None:
layers = {'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2',
'28': 'conv5_1'}
features = {}
x = image
for name, layer in model._modules.items():
x = layer(x)
if name in layers:
features[layers[name]] = x
return features
# reference: https://discuss.pytorch.org/t/implementation-of-gram-matrix-in-neural-style-tutorial/46803
def gram_matrix(tensor):
# get the batch_size, depth, height, and width of the Tensor
b, d, h, w = tensor.size()
# reshape so we're multiplying the features for each channel
tensor = tensor.view(d, h * w)
# calculate the gram matrix
gram = torch.mm(tensor, tensor.t())
return gram
# get content and style features before training
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
# calculate the gram matrices for each layer of our style representation
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
# create a third "target" image and prep it for change
target = content.clone().requires_grad_(True).to(device)
#calculate the *total* loss
def total_Loss():
style_weights = {'conv1_1': 0.2,
'conv2_1': 0.2,
'conv3_1': 0.3,
'conv4_1': 0.4,
'conv5_1': 0.7}
content_weight = 1 # alpha
style_weight = 1e6 # beta
# get the features from your target image
target_features = get_features(target, vgg)
# the content loss
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)
# the style loss
# initialize the style loss to 0
style_loss = 0
# then add to it for each layer's gram matrix loss
for layer in style_weights:
# get the "target" style representation for the layer
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
_, d, h, w = target_feature.shape
# get the "style" style representation
style_gram = style_grams[layer]
# the style loss for one layer, weighted appropriately
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2)
# add to the style loss
style_loss += layer_style_loss / (d * h * w)
# calculate the *total* loss
total_loss = content_weight * content_loss + style_weight * style_loss
return total_loss
# for displaying the target image, intermittently
show_every = 400
# iteration hyperparameters
optimizer = optim.Adam([target], lr=0.003)
steps = 4000
for i in range(1, steps+1):
total_loss = total_Loss()
# update target image
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
# display intermediate images and print the loss
if i % show_every == 0:
print('Total loss: ', total_loss.item())
plt.imshow(im_convert(target))
plt.show()
# display content and final, target image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.imshow(im_convert(content))
ax2.imshow(im_convert(target))
plt.imshow(im_convert(target))
plt.axis('off')
plt.savefig('result.png')
from feature_map_extraction import *
# select image to do feature map visualization
content = load_image('mona.jpg')
vgg_19 = models.vgg19(pretrained=True)
l = get_features_forVGG(vgg_19)
conv_model = nn.Sequential(*l)
# layer selection followed by VGG-19 layout
layer_selection = ['2','5','8','11','15']
myNet = myModel(conv_model, layer_selection)
output = myNet(content)
# i-th layer that you want to see
index = 0 # see '2' layer in VGG-19
plot_feature_map(output, index)
# i-th layer that you want to see
index = 4 # see '15' layer in VGG-19
plot_feature_map(output, index)
def train_NST_with_parameter_set(target, layers):
steps_list = []
loss_list = []
image_list = []
# for displaying the target image, intermittently
show_every = 500
# iteration hyperparameters
optimizer = optim.Adam([target], lr=0.02)
steps = 4500 # decide how many iterations to update your image (5000)
for ii in range(1, steps+1):
# get the features from your target image
target_features = get_features(target, vgg, layers)
# the content loss
content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)
# the style loss
# initialize the style loss to 0
style_loss = 0
# then add to it for each layer's gram matrix loss
for layer in style_weights:
# get the "target" style representation for the layer
target_feature = target_features[layer]
target_gram = gram_matrix(target_feature)
_, d, h, w = target_feature.shape
# get the "style" style representation
style_gram = style_grams[layer]
# the style loss for one layer, weighted appropriately
layer_style_loss = style_weights[layer] * torch.mean((target_gram - style_gram)**2)
# add to the style loss
style_loss += layer_style_loss / (d * h * w)
# calculate the *total* loss
# var loss
#var_loss = variation_loss(target)
total_loss = content_weight * content_loss + style_weight * style_loss
# update your target image
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
# display intermediate images and print the loss
if ii % show_every == 0 or ii == 1:
print('Total loss: ', total_loss.item())
plt.imshow(im_convert(target))
steps_list.append(ii)
loss_list.append(total_loss.item())
image_list.append(im_convert(target))
plt.show()
return steps_list, loss_list, image_list
import matplotlib.pyplot as plt
def plot_sequential_fig(list_image):
row = len(list_image)
col = len(list_image[0])
fig = plt.figure(figsize=(20, 6))
counter = 1
for i in range(row):
for j in range(col):
fig.add_subplot(row, col, counter)
counter += 1
plt.axis('off')
plt.imshow(list_image[i][j])
fig.subplots_adjust(
wspace=0.0,
hspace=0.0)
import matplotlib.pyplot as plt
def plot_loss_curve(list_steps, list_losses):
n = len(list_steps)
for i in range(n):
label = "hyperparameter set {}".format(i+1)
plt.plot(list_steps[i], list_losses[i], label = label)
plt.legend()
plt.xlabel('steps')
plt.ylabel('log(loss)')
def take_log(a):
output = []
for element in a:
output.append(np.log(element))
return output
default_layers = {'0': 'conv1_1',
'5': 'conv2_1',
'10': 'conv3_1',
'19': 'conv4_1',
'21': 'conv4_2', ## content representation
'28': 'conv5_1'}
different_layers = {'2': 'conv1_1',
'7': 'conv2_1',
'12': 'conv3_1',
'16': 'conv4_1',
'25': 'conv4_2', ## content representation
'34': 'conv5_1'}
# load in content and style image
content = load_image('pp1.jpg').to(device)
# Resize style to match content
style = load_image('mona.jpg', shape=content.shape[-2:]).to(device)
# parameter 1
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
#target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")
style_weights = {'conv1_1': 1.,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2}
content_weight = 1 # alpha
style_weight = 1e6 # beta
a_1, b_1, c_1 = train_NST_with_parameter_set(target, default_layers)
# parameter 2
content_features = get_features(content, vgg, different_layers)
style_features = get_features(style, vgg, different_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
target = content.clone().requires_grad_(True).to(device)
#target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")
style_weights = {'conv1_1': 1.,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2}
content_weight = 1 # alpha
style_weight = 1e6 # beta
a_2, b_2, c_2 = train_NST_with_parameter_set(target, different_layers)
# parameter 3
content_features = get_features(content, vgg, default_layers)
style_features = get_features(style, vgg, default_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
#target = content.clone().requires_grad_(True).to(device)
target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")
style_weights = {'conv1_1': 1.,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2}
content_weight = 1 # alpha
style_weight = 1 # beta
a_3, b_3, c_3 = train_NST_with_parameter_set(target, default_layers)
# parameter 4
content_features = get_features(content, vgg, different_layers)
style_features = get_features(style, vgg, different_layers)
style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}
#target = content.clone().requires_grad_(True).to(device)
target = torch.rand(1,3,400,533, requires_grad=True, device="cuda")
style_weights = {'conv1_1': 1.,
'conv2_1': 0.75,
'conv3_1': 0.2,
'conv4_1': 0.2,
'conv5_1': 0.2}
content_weight = 1 # alpha
style_weight = 1 # beta
a_4, b_4, c_4 = train_NST_with_parameter_set(target, different_layers)
# loss curve plot
list_steps = [a_1, a_2, a_3, a_4]
list_losses = [take_log(b_1), take_log(b_2), take_log(b_3), take_log(b_4)]
plot_loss_curve(list_steps, list_losses)
# sequential output image
list_image = [c_1, c_2, c_3, c_4]
plot_sequential_fig(list_image)